CUDA_VISIBLE_DEVICES=5 python run.py \
    --backend llama31 \
    --data_split test \
    --n_generate_sample 5 \
    --n_evaluate_sample 1 \
    --prompt_sample cot \
    --temperature 1.0 \
    --iterations 1 \
    --save_path trajectories-llama31-Phi3Q_epoch2 \
    --log logs/eval-llama31-Phi3Q-beam_epoch2.log \
    --max_depth 10 \
    --algorithm beam \
    --enable_fastchat_conv \
    --enable_seq_mode \
    --conv_template llama-3 \
    --q_model_conv_template phi3 \
    --policy_model_name_or_path /home/zhaiyuanzhao/FastChat/checkpoints-webshop-Phi-1_5-StepLevelVerifier-Phi3-iteration1/epoch2-chosen \
    --reference_model_name_or_path /home/zhaiyuanzhao/llm/phi-1_5 \
    ${@}